import requests
from lxml import etree

if __name__ == '__main__':
    # 爬取页面源码数据
    url = "https://www.shicimingju.com/category/all"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"}
    page_text = requests.get(url=url, headers=headers).text

    tree = etree.HTML(page_text)
    div_list = tree.xpath('//body/div[3]/div[1]/div[@class="card zuozhe_card"]')
    print(div_list)

    context_list = []
    for div in div_list:
        context_list.append(div.xpath("./div[2]/h3//text()")[0])
    print(context_list)

    # # 爬取58同城二手房标题信息
    # url = "https://bj.58.com/ershoufang/"
    # headers = {
    #     "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"}
    # page_text = requests.get(url=url, headers=headers).text
    #
    # tree = etree.HTML(page_text)
    # r = tree.xpath('//title/text()')
    # print(r)
    #
    # # print(dir(r[1]))
